clear
do "...\First.do"

********************************************************************************
*Follow the Kjaergaards algorith to match patient and doctors
********************************************************************************

clear
use "$raw\gp_all_new3.dta"

* 1. step: Restric services and recode time of services
*** Thses steps are done in SAS

* Keep GPs
*keep if speciale=="80"

* Keep Group-1 insursed patients
*keep if sikgrup=="1"

* Drop kids
*drop if barnmak=="1"

* Keep own patients
*tab pattyp
*drop if inlist(pattyp,"0", "00", "02", "03", "04")
*drop if inlist(pattyp,"11", "12", "2", "3", "4")


* Recode time:
g year_afr=substr(afrper,1,2)
g year_honuge=substr(honuge,1,2)

g help=(year_afr!=year_honuge)
replace year=year-1 if help==1
drop help


g month=substr(afrper,3,2)
g week=substr(honuge,3,2)
g help=(inlist(week,"51","52","53") & inlist(month,"01","02") & year_afr==year_honuge)

replace year=year-1 if help==1
drop help
drop year_afr year_honuge
*tab year

*3: Exclude service weeks with multiple practices:
sort pnr honuge
drop if missing(pnr)

drop if inlist(ydltid, "2", "3", "4", "5", "6", "7", "8", "9")
drop ydltid
drop henvisni

duplicates drop


bys pnr honuge: g N=_N
drop if N>1
drop N

drop if ydtyp!=5
drop ydtyp

duplicates drop


*4: Code preliminary practice time intervals
g wednesday_week1=3-dow(mdy(01,01,year))
destring week, g(week2)

gen date=mdy(01,01,year)+wednesday_week1+7*(week2-1)
replace date=date+7 ///
	if inlist(year,1993,1994,1999,2000,2005,2010,2011,2016)
	
format date %td

drop if date==.
drop week2

sort pnr date

bys pnr (date): gen GP_from=date if _n==1 | (ydernr[_n] !=ydernr[_n-1] & _n>1)

drop if GP_from==.
format GP_from %td

local enddate=mdy(12,31,2019)

bys pnr (GP_from): gen GP_until=min(GP_from[_n+1]-1,`enddate')
format GP_until %td

keep ydernr pnr year date GP_from GP_until wednesday_week1

* 6: Recode practice time intervals taking into account practice closure
** I use other variable names than in the algorithm

merge m:1 ydernr using "$work\gp_closures_year.dta"
drop if _merge==2
drop _merge
drop first_gp

g help=last_gp
format help %10.0g
g help_date=dofw(help)
format help_date %td

g help_year=year(help_date)
g help_week=week(help_date)

gen lastservice=mdy(01,01,help_year)+wednesday_week1+7*(help_week-1)
replace lastservice=lastservice+7 ///
	if inlist(year,1993,1994,1999,2000,2005,2010,2011,2016)

format lastservice %td
drop wednesday_week1 help help_date help_year help_week
sort pnr GP_from

* Identify practice time intervals with practice closure
gen ind=(GP_from<=lastservice & lastservice<=GP_until)

bys pnr (GP_from): gen ind2=(GP_from[_n-1]<=lastservice[_n-1]  ///
	& lastservice[_n-1]<=GP_until[_n-1] ///
	& GP_from[_n]==GP_until[_n-1]+1 & _n>1)


replace GP_until=lastservice if ind==1
replace GP_from=lastservice[_n-1]+1 if ind2==1



*8. drop small practice time intervals (<31 days)

bys pnr (GP_from): gen temp_until=GP_until ///
	if _n==_N | (GP_until[_n]<GP_from[_n+1]-1 & _n<=_N)
gsort +pnr -GP_until

bys pnr: replace temp_until=temp_until[_n-1] ///
	if temp_until==. & _n>1
sort pnr GP_from

g interval=GP_until-GP_from
drop if interval<31
drop interval

by pnr: replace GP_until=GP_from[_n+1]-1 ///
	if temp_until[_n]==temp_until[_n+1] & _n<_N 
	
by pnr: replace GP_until=temp_until[_n] ///
	if _n==_N | (temp_until[_n] != temp_until[_n+1] & _n>_N)
	
	
* 9: recode practice time intervals to montly intervals
gen GP_from_month=mdy(month(GP_from),01,year(GP_from))
gen GP_until_month=mdy(month(GP_until),01,year(GP_until))-1

format GP_from_month GP_until_month %td
format GP_from GP_until %td
	
keep ydernr pnr year GP_from_month GP_until_month
sort pnr GP_from_month

save "$work\patient_gp_new.dta", replace



********************************************************************************
* Convert to yearly observations
********************************************************************************
clear
use "$work\patient_gp_new.dta"

sort pnr year
bys pnr year: g N=_N
drop N

* Number of years with GP
g dif=year(GP_until_month)-year(GP_from_month) 
tab dif

* each year with the GP
forvalues v=0/30 {
g year_`v'=year(GP_from_month)+`v' if dif>=`v'
}

reshape long year_, i(pnr ydernr GP_from_month) j(n)

drop if year_==.
drop year
rename year_ year
drop n

sort pnr year GP_from_month

* Keep the last doctor seen within each year
bys pnr year (GP_from_month): g n=_n
bys pnr year (GP_from_month): g N=_N
keep if n==N
drop n N
keep if inrange(year,1990,2019)

bys pnr year: g N=_N
tab N
drop N


save "$work\patient_gp_year.dta", replace

bys ydernr year: g N_patients=_N

keep ydernr year N_patients
duplicates drop

save "$work\N_patients.dta", replace